@hyperlex/mammoth 1.4.10 → 1.4.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/.eslintrc.json +0 -1
  2. package/.idea/compiler.xml +6 -0
  3. package/.idea/inspectionProfiles/Project_Default.xml +6 -0
  4. package/.idea/mammoth.js.iml +1 -5
  5. package/.idea/vcs.xml +1 -1
  6. package/.idea/workspace.xml +173 -0
  7. package/NEWS +55 -0
  8. package/README.md +39 -18
  9. package/lib/document-to-html.js +3 -0
  10. package/lib/documents.js +2 -0
  11. package/lib/docx/body-reader.js +74 -17
  12. package/lib/docx/numbering-xml.js +27 -4
  13. package/lib/index.d.ts +78 -0
  14. package/lib/index.js +7 -10
  15. package/lib/raw-text.js +14 -0
  16. package/lib/style-reader.js +15 -13
  17. package/lib/styles/document-matchers.js +1 -0
  18. package/lib/zipfile.js +26 -26
  19. package/mammoth.browser.js +10436 -19087
  20. package/mammoth.browser.min.js +21 -18
  21. package/package-lock.json +2654 -0
  22. package/package.json +11 -12
  23. package/test/document-to-html.tests.js +24 -0
  24. package/test/docx/body-reader.tests.js +170 -13
  25. package/test/docx/numbering-xml.tests.js +38 -0
  26. package/test/docx/style-map.tests.js +45 -44
  27. package/test/raw-text.tests.js +61 -0
  28. package/test/style-reader.tests.js +32 -25
  29. package/test/test-data/comments.docx +0 -0
  30. package/test/test-data/footnote-hyperlink.docx +0 -0
  31. package/test/test-data/footnotes.docx +0 -0
  32. package/test/test-data/simple-list.docx +0 -0
  33. package/test/test-data/single-paragraph.docx +0 -0
  34. package/test/test-data/strikethrough.docx +0 -0
  35. package/test/test-data/tables.docx +0 -0
  36. package/test/test-data/text-box.docx +0 -0
  37. package/test/test-data/tiny-picture.docx +0 -0
  38. package/test/test-data/underline.docx +0 -0
  39. package/test/zipfile.tests.js +12 -10
  40. package/.github/ISSUE_TEMPLATE.md +0 -12
  41. package/.travis.yml +0 -10
package/.eslintrc.json CHANGED
@@ -52,7 +52,6 @@
52
52
  "no-spaced-func": ["error"],
53
53
  "no-trailing-spaces": ["error", {"skipBlankLines": true}],
54
54
  "no-whitespace-before-property": ["error"],
55
- "object-curly-spacing": ["error", "never"],
56
55
  "one-var": ["error", "never"],
57
56
  "semi": ["error", "always"],
58
57
  "semi-spacing": ["error", {"before": false}],
@@ -0,0 +1,6 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="TypeScriptCompiler">
4
+ <option name="memoryLimit" value="10240" />
5
+ </component>
6
+ </project>
@@ -0,0 +1,6 @@
1
+ <component name="InspectionProjectProfileManager">
2
+ <profile version="1.0">
3
+ <option name="myName" value="Project Default" />
4
+ <inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
5
+ </profile>
6
+ </component>
@@ -1,11 +1,7 @@
1
1
  <?xml version="1.0" encoding="UTF-8"?>
2
2
  <module type="WEB_MODULE" version="4">
3
3
  <component name="NewModuleRootManager">
4
- <content url="file://$MODULE_DIR$">
5
- <excludeFolder url="file://$MODULE_DIR$/temp" />
6
- <excludeFolder url="file://$MODULE_DIR$/.tmp" />
7
- <excludeFolder url="file://$MODULE_DIR$/tmp" />
8
- </content>
4
+ <content url="file://$MODULE_DIR$" />
9
5
  <orderEntry type="inheritedJdk" />
10
6
  <orderEntry type="sourceFolder" forTests="false" />
11
7
  </component>
package/.idea/vcs.xml CHANGED
@@ -1,6 +1,6 @@
1
1
  <?xml version="1.0" encoding="UTF-8"?>
2
2
  <project version="4">
3
3
  <component name="VcsDirectoryMappings">
4
- <mapping directory="$PROJECT_DIR$" vcs="Git" />
4
+ <mapping directory="" vcs="Git" />
5
5
  </component>
6
6
  </project>
@@ -0,0 +1,173 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="AutoImportSettings">
4
+ <option name="autoReloadType" value="SELECTIVE" />
5
+ </component>
6
+ <component name="ChangeListManager">
7
+ <list default="true" id="fb2a5579-0030-43eb-b358-b723f29ab6e1" name="Changes" comment="Don't ignore browser build and commit yarn.lock">
8
+ <change beforePath="$PROJECT_DIR$/package.json" beforeDir="false" afterPath="$PROJECT_DIR$/package.json" afterDir="false" />
9
+ <change beforePath="$PROJECT_DIR$/yarn.lock" beforeDir="false" afterPath="$PROJECT_DIR$/yarn.lock" afterDir="false" />
10
+ </list>
11
+ <option name="SHOW_DIALOG" value="false" />
12
+ <option name="HIGHLIGHT_CONFLICTS" value="true" />
13
+ <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
14
+ <option name="LAST_RESOLUTION" value="IGNORE" />
15
+ </component>
16
+ <component name="EmbeddingIndexingInfo">
17
+ <option name="cachedIndexableFilesCount" value="119" />
18
+ <option name="fileBasedEmbeddingIndicesEnabled" value="true" />
19
+ </component>
20
+ <component name="Git.Settings">
21
+ <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
22
+ </component>
23
+ <component name="McpProjectServerCommands">
24
+ <commands />
25
+ <urls />
26
+ </component>
27
+ <component name="ProjectColorInfo"><![CDATA[{
28
+ "associatedIndex": 8
29
+ }]]></component>
30
+ <component name="ProjectId" id="39nIvtlpBCXmNpPviWMaHuovjPb" />
31
+ <component name="ProjectViewState">
32
+ <option name="autoscrollFromSource" value="true" />
33
+ <option name="hideEmptyMiddlePackages" value="true" />
34
+ <option name="showLibraryContents" value="true" />
35
+ </component>
36
+ <component name="PropertiesComponent"><![CDATA[{
37
+ "keyToString": {
38
+ "ModuleVcsDetector.initialDetectionPerformed": "true",
39
+ "RunOnceActivity.MCP Project settings loaded": "true",
40
+ "RunOnceActivity.ShowReadmeOnStart": "true",
41
+ "RunOnceActivity.git.unshallow": "true",
42
+ "RunOnceActivity.typescript.service.memoryLimit.init": "true",
43
+ "com.intellij.ml.llm.matterhorn.ej.ui.settings.DefaultModelSelectionForGA.v1": "true",
44
+ "git-widget-placeholder": "Rebasing master",
45
+ "javascript.preferred.runtime.type.id": "node",
46
+ "junie.onboarding.icon.badge.shown": "true",
47
+ "last_opened_file_path": "/Users/sdiaz/work/mammoth.js",
48
+ "node.js.detected.package.eslint": "true",
49
+ "node.js.detected.package.tslint": "true",
50
+ "node.js.selected.package.eslint": "(autodetect)",
51
+ "node.js.selected.package.tslint": "(autodetect)",
52
+ "nodejs_package_manager_path": "yarn",
53
+ "to.speed.mode.migration.done": "true",
54
+ "vue.rearranger.settings.migration": "true"
55
+ }
56
+ }]]></component>
57
+ <component name="SharedIndexes">
58
+ <attachedChunks>
59
+ <set>
60
+ <option value="bundled-js-predefined-d6986cc7102b-9b0f141eb926-JavaScript-WS-253.30387.83" />
61
+ </set>
62
+ </attachedChunks>
63
+ </component>
64
+ <component name="TaskManager">
65
+ <task active="true" id="Default" summary="Default task">
66
+ <changelist id="fb2a5579-0030-43eb-b358-b723f29ab6e1" name="Changes" comment="" />
67
+ <created>1771325642160</created>
68
+ <option name="number" value="Default" />
69
+ <option name="presentableId" value="Default" />
70
+ <updated>1771325642160</updated>
71
+ <workItem from="1771325643308" duration="3144000" />
72
+ </task>
73
+ <task id="LOCAL-00001" summary="Don't ignore browser build and commit yarn.lock">
74
+ <option name="closed" value="true" />
75
+ <created>1771336874338</created>
76
+ <option name="number" value="00001" />
77
+ <option name="presentableId" value="LOCAL-00001" />
78
+ <option name="project" value="LOCAL" />
79
+ <updated>1771336874338</updated>
80
+ </task>
81
+ <task id="LOCAL-00002" summary="Publish package to npm registry">
82
+ <option name="closed" value="true" />
83
+ <created>1771337011188</created>
84
+ <option name="number" value="00002" />
85
+ <option name="presentableId" value="LOCAL-00002" />
86
+ <option name="project" value="LOCAL" />
87
+ <updated>1771337011188</updated>
88
+ </task>
89
+ <task id="LOCAL-00003" summary="Publish package to npm registry">
90
+ <option name="closed" value="true" />
91
+ <created>1771337081623</created>
92
+ <option name="number" value="00003" />
93
+ <option name="presentableId" value="LOCAL-00003" />
94
+ <option name="project" value="LOCAL" />
95
+ <updated>1771337081623</updated>
96
+ </task>
97
+ <task id="LOCAL-00004" summary="Don't ignore browser build and commit yarn.lock">
98
+ <option name="closed" value="true" />
99
+ <created>1771337138560</created>
100
+ <option name="number" value="00004" />
101
+ <option name="presentableId" value="LOCAL-00004" />
102
+ <option name="project" value="LOCAL" />
103
+ <updated>1771337138560</updated>
104
+ </task>
105
+ <task id="LOCAL-00005" summary="Don't ignore browser build and commit yarn.lock">
106
+ <option name="closed" value="true" />
107
+ <created>1771337146350</created>
108
+ <option name="number" value="00005" />
109
+ <option name="presentableId" value="LOCAL-00005" />
110
+ <option name="project" value="LOCAL" />
111
+ <updated>1771337146350</updated>
112
+ </task>
113
+ <task id="LOCAL-00006" summary="Don't ignore browser build and commit yarn.lock">
114
+ <option name="closed" value="true" />
115
+ <created>1771337192606</created>
116
+ <option name="number" value="00006" />
117
+ <option name="presentableId" value="LOCAL-00006" />
118
+ <option name="project" value="LOCAL" />
119
+ <updated>1771337192606</updated>
120
+ </task>
121
+ <option name="localTasksCounter" value="7" />
122
+ <servers />
123
+ </component>
124
+ <component name="TypeScriptGeneratedFilesManager">
125
+ <option name="version" value="3" />
126
+ </component>
127
+ <component name="Vcs.Log.Tabs.Properties">
128
+ <option name="RECENT_FILTERS">
129
+ <map>
130
+ <entry key="Branch">
131
+ <value>
132
+ <list>
133
+ <RecentGroup>
134
+ <option name="FILTER_VALUES">
135
+ <option value="HEAD" />
136
+ </option>
137
+ </RecentGroup>
138
+ </list>
139
+ </value>
140
+ </entry>
141
+ </map>
142
+ </option>
143
+ <option name="TAB_STATES">
144
+ <map>
145
+ <entry key="MAIN">
146
+ <value>
147
+ <State>
148
+ <option name="FILTERS">
149
+ <map>
150
+ <entry key="branch">
151
+ <value>
152
+ <list>
153
+ <option value="HEAD" />
154
+ </list>
155
+ </value>
156
+ </entry>
157
+ </map>
158
+ </option>
159
+ </State>
160
+ </value>
161
+ </entry>
162
+ </map>
163
+ </option>
164
+ </component>
165
+ <component name="VcsManagerConfiguration">
166
+ <option name="CHECK_CODE_SMELLS_BEFORE_PROJECT_COMMIT" value="false" />
167
+ <option name="CHECK_NEW_TODO" value="false" />
168
+ <MESSAGE value="Publish package to npm registry" />
169
+ <MESSAGE value="Dynamic styleMaps for paragraphs&#10;&#10;# Conflicts:&#10;#&#9;mammoth.browser.js&#10;#&#9;mammoth.browser.min.js" />
170
+ <MESSAGE value="Don't ignore browser build and commit yarn.lock" />
171
+ <option name="LAST_COMMIT_MESSAGE" value="Don't ignore browser build and commit yarn.lock" />
172
+ </component>
173
+ </project>
package/NEWS CHANGED
@@ -1,3 +1,58 @@
1
+ # 1.4.21
2
+
3
+ * Ignore w:u elements when w:val is missing.
4
+
5
+ # 1.4.20
6
+
7
+ * Emit warning instead of throwing exception when image file cannot be found for
8
+ a:blip elements.
9
+
10
+ # 1.4.19
11
+
12
+ * Add TypeScript declarations.
13
+
14
+ # 1.4.18
15
+
16
+ * When extracting raw text, convert tab elements to tab characters.
17
+
18
+ * Handle internal hyperlinks created with complex fields.
19
+
20
+ * Update JSZip to 3.2.0. This addresses CVE-2021-23413 in JSZip.
21
+
22
+ # 1.4.17
23
+
24
+ * Handle w:num with invalid w:abstractNumId.
25
+ * Update underscore to 1.13.1.
26
+
27
+ # 1.4.16
28
+
29
+ * Convert symbols in supported fonts to corresponding Unicode characters.
30
+
31
+ # 1.4.15
32
+
33
+ * Support numbering defined by paragraph style.
34
+
35
+ # 1.4.14
36
+
37
+ * Add style mapping for all caps.
38
+
39
+ # 1.4.13
40
+
41
+ * Use package-lock.json instead of npm-shrinkwrap.json.
42
+
43
+ # 1.4.12
44
+
45
+ * Handle underline elements where w:val is "none".
46
+
47
+ # 1.4.11
48
+
49
+ * Re-publishing to remove superfluous files.
50
+
51
+ # 1.4.10
52
+
53
+ * Read font size for runs.
54
+ * Support soft hyphens.
55
+
1
56
  # 1.4.9
2
57
 
3
58
  * Allow hyperlinks to be collapsed.
package/README.md CHANGED
@@ -1,7 +1,7 @@
1
1
  # Mammoth .docx to HTML converter
2
2
 
3
3
  Mammoth is designed to convert .docx documents,
4
- such as those created by Microsoft Word,
4
+ such as those created by Microsoft Word, Google Docs and LibreOffice,
5
5
  and convert them to HTML.
6
6
  Mammoth aims to produce simple and clean HTML by using semantic information in the document,
7
7
  and ignoring other details.
@@ -104,18 +104,19 @@ Where `custom-style-map` looks something like:
104
104
  p[style-name='Aside Heading'] => div.aside > h2:fresh
105
105
  p[style-name='Aside Text'] => div.aside > p:fresh
106
106
 
107
- Lines beginning with `#` will be ignored.
107
+ A description of the syntax for style maps can be found in the section ["Writing style maps"](#writing-style-maps).
108
108
 
109
109
  #### Markdown
110
110
 
111
+ Markdown support is deprecated.
112
+ Generating HTML and using a separate library to convert the HTML to Markdown is recommended,
113
+ and is likely to produce better results.
114
+
111
115
  Using `--output-format=markdown` will cause Markdown to be generated.
112
116
  For instance:
113
117
 
114
118
  mammoth document.docx --output-format=markdown
115
119
 
116
- Markdown support is still in its early stages,
117
- so you may find some features are unsupported.
118
-
119
120
  ### Library
120
121
 
121
122
  In node.js, mammoth can be required in the usual way:
@@ -167,7 +168,7 @@ Mammoth maps some common .docx styles to HTML elements.
167
168
  For instance,
168
169
  a paragraph with the style name `Heading 1` is converted to a `h1` element.
169
170
  You can pass in a custom map for styles by passing an options object with a `styleMap` property as a second argument to `convertToHtml`.
170
- A description of the syntax for style maps can be found in the section "Writing style maps".
171
+ A description of the syntax for style maps can be found in the section ["Writing style maps"](#writing-style-maps).
171
172
  For instance, if paragraphs with the style name `Section Title` should be converted to `h1` elements,
172
173
  and paragraphs with the style name `Subsection Title` should be converted to `h2` elements:
173
174
 
@@ -343,7 +344,7 @@ Converts the source document to HTML.
343
344
  ignoring blank lines and lines starting with `#`:
344
345
  If `options.styleMap` is an array,
345
346
  each element is expected to be a string representing a single style mapping.
346
- See "Writing style maps" for a reference to the syntax for style maps.
347
+ See ["Writing style maps"](#writing-style-maps) for a reference to the syntax for style maps.
347
348
 
348
349
  * `includeEmbeddedStyleMap`: by default,
349
350
  if the document contains an embedded style map, then it is combined with the default style map.
@@ -543,10 +544,12 @@ var options = {
543
544
  Or if you want paragraphs that have been explicitly set to use monospace fonts to represent code:
544
545
 
545
546
  ```javascript
547
+ const monospaceFonts = ["consolas", "courier", "courier new"];
548
+
546
549
  function transformParagraph(paragraph) {
547
550
  var runs = mammoth.transforms.getDescendantsOfType(paragraph, "run");
548
551
  var isMatch = runs.length > 0 && runs.every(function(run) {
549
- return run.font && fonts.indexOf(run.font.toLowerCase()) !== -1;
552
+ return run.font && monospaceFonts.indexOf(run.font.toLowerCase()) !== -1;
550
553
  });
551
554
  if (isMatch) {
552
555
  return {
@@ -624,7 +627,7 @@ You can specify this by using the `:fresh` modifier:
624
627
 
625
628
  `p[style-name='Heading 1'] => h1:fresh`
626
629
 
627
- The two consective `Heading 1` .docx paragraphs will then be converted to two separate `h1` elements.
630
+ The two consecutive `Heading 1` .docx paragraphs will then be converted to two separate `h1` elements.
628
631
 
629
632
  Reusing elements is useful in generating more complicated HTML structures.
630
633
  For instance, suppose your .docx contains asides.
@@ -725,6 +728,17 @@ strike
725
728
  Note that this matches text that has had strikethrough explicitly applied to it.
726
729
  It will not match any text that is struckthrough because of its paragraph or run style.
727
730
 
731
+ #### All caps
732
+
733
+ Match explicitly all caps text:
734
+
735
+ ```
736
+ all-caps
737
+ ```
738
+
739
+ Note that this matches text that has had all caps explicitly applied to it.
740
+ It will not match any text that is all caps because of its paragraph or run style.
741
+
728
742
  #### Small caps
729
743
 
730
744
  Match explicitly small caps text:
@@ -736,6 +750,15 @@ small-caps
736
750
  Note that this matches text that has had small caps explicitly applied to it.
737
751
  It will not match any text that is small caps because of its paragraph or run style.
738
752
 
753
+ #### Ignoring document elements
754
+
755
+ Use `!` to ignore a document element.
756
+ For instance, to ignore any paragraph with the style `Comment`:
757
+
758
+ ```
759
+ p[style-name='Comment'] => !
760
+ ```
761
+
739
762
  ### HTML paths
740
763
 
741
764
  #### Single elements
@@ -798,15 +821,6 @@ div.aside > h2
798
821
 
799
822
  You can nest elements to any depth.
800
823
 
801
- #### Ignoring document elements
802
-
803
- Use `!` to ignore a document element.
804
- For instance, to ignore any paragraph with the style `Comment`:
805
-
806
- ```
807
- p[style-name='Comment'] => !
808
- ```
809
-
810
824
  ## Upgrading to later versions
811
825
 
812
826
  ### 1.0.0
@@ -881,3 +895,10 @@ Thanks to the following people for their contributions to Mammoth:
881
895
  * [Jacob Wang](https://github.com/jaceyshome)
882
896
 
883
897
  * Supporting styles defined without names
898
+
899
+ ## Donations
900
+
901
+ If you'd like to say thanks, feel free to [make a donation through Ko-fi](https://ko-fi.com/S6S01MG20).
902
+
903
+ If you use Mammoth as part of your business,
904
+ please consider supporting the ongoing maintenance of Mammoth by [making a weekly donation through Liberapay](https://liberapay.com/mwilliamson/donate).
@@ -130,6 +130,9 @@ function DocumentConversion(options, comments) {
130
130
  if (run.isSmallCaps) {
131
131
  paths.push(findHtmlPathForRunProperty("smallCaps"));
132
132
  }
133
+ if (run.isAllCaps) {
134
+ paths.push(findHtmlPathForRunProperty("allCaps"));
135
+ }
133
136
  if (run.isStrikethrough) {
134
137
  paths.push(findHtmlPathForRunProperty("strikethrough", "s"));
135
138
  }
package/lib/documents.js CHANGED
@@ -73,9 +73,11 @@ function Run(children, properties) {
73
73
  isUnderline: properties.isUnderline,
74
74
  isItalic: properties.isItalic,
75
75
  isStrikethrough: properties.isStrikethrough,
76
+ isAllCaps: properties.isAllCaps,
76
77
  isSmallCaps: properties.isSmallCaps,
77
78
  verticalAlignment: properties.verticalAlignment || verticalAlignment.baseline,
78
79
  font: properties.font || null,
80
+ fontSize: properties.fontSize || null,
79
81
  size: properties.size || null,
80
82
  color: properties.color || null,
81
83
  highlight: properties.highlight || null
@@ -1,6 +1,7 @@
1
1
  exports.createBodyReader = createBodyReader;
2
2
  exports._readNumberingProperties = readNumberingProperties;
3
3
 
4
+ var dingbatToUnicode = require("dingbat-to-unicode");
4
5
  var _ = require("underscore");
5
6
 
6
7
  var documents = require("../documents");
@@ -85,17 +86,23 @@ function BodyReader(options) {
85
86
 
86
87
  function readRunProperties(element) {
87
88
  return readRunStyle(element).map(function(style) {
89
+ var fontSizeString = element.firstOrEmpty("w:sz").attributes["w:val"];
90
+ // w:sz gives the font size in half points, so halve the value to get the size in points
91
+ var fontSize = /^[0-9]+$/.test(fontSizeString) ? parseInt(fontSizeString, 10) / 2 : null;
92
+
88
93
  return {
89
94
  type: "runProperties",
90
95
  styleId: style.styleId,
91
96
  styleName: style.name,
92
97
  verticalAlignment: element.firstOrEmpty("w:vertAlign").attributes["w:val"],
93
98
  font: element.firstOrEmpty("w:rFonts").attributes["w:ascii"],
99
+ fontSize: fontSize,
94
100
  size: element.firstOrEmpty("w:sz").attributes["w:val"],
95
101
  isBold: readBooleanElement(element.first("w:b")),
96
- isUnderline: readBooleanElement(element.first("w:u")),
102
+ isUnderline: readUnderline(element.first("w:u")),
97
103
  isItalic: readBooleanElement(element.first("w:i")),
98
104
  isStrikethrough: readBooleanElement(element.first("w:strike")),
105
+ isAllCaps: readBooleanElement(element.first("w:caps")),
99
106
  isSmallCaps: readBooleanElement(element.first("w:smallCaps")),
100
107
  color: element.firstOrEmpty("w:color").attributes["w:val"],
101
108
  highlight: element.firstOrEmpty("w:highlight").attributes["w:val"]
@@ -103,6 +110,15 @@ function BodyReader(options) {
103
110
  });
104
111
  }
105
112
 
113
+ function readUnderline(element) {
114
+ if (element) {
115
+ var value = element.attributes["w:val"];
116
+ return value !== undefined && value !== "false" && value !== "0" && value !== "none";
117
+ } else {
118
+ return false;
119
+ }
120
+ }
121
+
106
122
  function readBooleanElement(element) {
107
123
  if (element) {
108
124
  var value = element.attributes["w:val"];
@@ -153,28 +169,33 @@ function BodyReader(options) {
153
169
  } else if (type === "end") {
154
170
  complexFieldStack.pop();
155
171
  } else if (type === "separate") {
156
- var href = parseHyperlinkFieldCode(currentInstrText.join(''));
157
- var complexField = href === null ? unknownComplexField : {type: "hyperlink", href: href};
172
+ var hyperlinkOptions = parseHyperlinkFieldCode(currentInstrText.join(''));
173
+ var complexField = hyperlinkOptions === null ? unknownComplexField : {type: "hyperlink", options: hyperlinkOptions};
158
174
  complexFieldStack.pop();
159
175
  complexFieldStack.push(complexField);
160
176
  }
161
177
  return emptyResult();
162
178
  }
163
179
 
164
- function currentHyperlinkHref() {
180
+ function currentHyperlinkOptions() {
165
181
  var topHyperlink = _.last(complexFieldStack.filter(function(complexField) {
166
182
  return complexField.type === "hyperlink";
167
183
  }));
168
- return topHyperlink ? topHyperlink.href : null;
184
+ return topHyperlink ? topHyperlink.options : null;
169
185
  }
170
186
 
171
187
  function parseHyperlinkFieldCode(code) {
172
- var result = /\s*HYPERLINK "(.*)"/.exec(code);
173
- if (result) {
174
- return result[1];
175
- } else {
176
- return null;
188
+ var externalLinkResult = /\s*HYPERLINK "(.*)"/.exec(code);
189
+ if (externalLinkResult) {
190
+ return {href: externalLinkResult[1]};
191
+ }
192
+
193
+ var internalLinkResult = /\s*HYPERLINK\s+\\l\s+"(.*)"/.exec(code);
194
+ if (internalLinkResult) {
195
+ return {anchor: internalLinkResult[1]};
177
196
  }
197
+
198
+ return null;
178
199
  }
179
200
 
180
201
  function readInstrText(element) {
@@ -182,6 +203,24 @@ function BodyReader(options) {
182
203
  return emptyResult();
183
204
  }
184
205
 
206
+ function readSymbol(element) {
207
+ // See 17.3.3.30 sym (Symbol Character) of ECMA-376 4th edition Part 1
208
+ var font = element.attributes["w:font"];
209
+ var char = element.attributes["w:char"];
210
+ var unicodeCharacter = dingbatToUnicode.hex(font, char);
211
+ if (unicodeCharacter == null && /^F0..$/.test(char)) {
212
+ unicodeCharacter = dingbatToUnicode.hex(font, char.substring(2));
213
+ }
214
+
215
+ if (unicodeCharacter == null) {
216
+ return emptyResultWithMessages([warning(
217
+ "A w:sym element with an unsupported character was ignored: char " + char + " in font " + font
218
+ )]);
219
+ } else {
220
+ return elementResult(new documents.Text(unicodeCharacter.string));
221
+ }
222
+ }
223
+
185
224
  function noteReferenceReader(noteType) {
186
225
  return function(element) {
187
226
  var noteId = element.attributes["w:id"];
@@ -221,7 +260,7 @@ function BodyReader(options) {
221
260
  styleId: style.styleId,
222
261
  styleName: style.name,
223
262
  alignment: element.firstOrEmpty("w:jc").attributes["w:val"],
224
- numbering: readNumberingProperties(element.firstOrEmpty("w:numPr"), numbering),
263
+ numbering: readNumberingProperties(style.styleId, element.firstOrEmpty("w:numPr"), numbering),
225
264
  indent: readParagraphIndent(element.firstOrEmpty("w:ind")),
226
265
  spacing: readParagraphSpacing(element.firstOrEmpty("w:spacing")),
227
266
  border: readParagraphBorders(element.firstOrEmpty("w:pBdr"))
@@ -234,9 +273,9 @@ function BodyReader(options) {
234
273
  var properties = _.find(children, isRunProperties);
235
274
  children = children.filter(negate(isRunProperties));
236
275
 
237
- var hyperlinkHref = currentHyperlinkHref();
238
- if (hyperlinkHref !== null) {
239
- children = [new documents.Hyperlink(children, {href: hyperlinkHref})];
276
+ var hyperlinkOptions = currentHyperlinkOptions();
277
+ if (hyperlinkOptions !== null) {
278
+ children = [new documents.Hyperlink(children, hyperlinkOptions)];
240
279
  }
241
280
 
242
281
  return new documents.Run(children, properties);
@@ -254,6 +293,10 @@ function BodyReader(options) {
254
293
  "w:noBreakHyphen": function() {
255
294
  return elementResult(new documents.Text("\u2011"));
256
295
  },
296
+ "w:softHyphen": function(element) {
297
+ return elementResult(new documents.Text("\u00AD"));
298
+ },
299
+ "w:sym": readSymbol,
257
300
  "w:hyperlink": function(element) {
258
301
  var relationshipId = element.attributes["r:id"];
259
302
  var anchor = element.attributes["w:anchor"];
@@ -451,7 +494,12 @@ function BodyReader(options) {
451
494
  function readBlip(element, blip) {
452
495
  var properties = element.first("wp:docPr").attributes;
453
496
  var altText = isBlank(properties.descr) ? properties.title : properties.descr;
454
- return readImage(findBlipImageFile(blip), altText);
497
+ var blipImageFile = findBlipImageFile(blip);
498
+ if (blipImageFile === null) {
499
+ return emptyResultWithMessages([warning("Could not find image file for a:blip element")]);
500
+ } else {
501
+ return readImage(blipImageFile, altText);
502
+ }
455
503
  }
456
504
 
457
505
  function isBlank(value) {
@@ -463,12 +511,14 @@ function BodyReader(options) {
463
511
  var linkRelationshipId = blip.attributes["r:link"];
464
512
  if (embedRelationshipId) {
465
513
  return findEmbeddedImageFile(embedRelationshipId);
466
- } else {
514
+ } else if (linkRelationshipId) {
467
515
  var imagePath = relationships.findTargetByRelationshipId(linkRelationshipId);
468
516
  return {
469
517
  path: imagePath,
470
518
  read: files.read.bind(files, imagePath)
471
519
  };
520
+ } else {
521
+ return null;
472
522
  }
473
523
  }
474
524
 
@@ -512,7 +562,14 @@ function BodyReader(options) {
512
562
  }
513
563
 
514
564
 
515
- function readNumberingProperties(element, numbering) {
565
+ function readNumberingProperties(styleId, element, numbering) {
566
+ if (styleId != null) {
567
+ var levelByStyleId = numbering.findLevelByParagraphStyleId(styleId);
568
+ if (levelByStyleId != null) {
569
+ return levelByStyleId;
570
+ }
571
+ }
572
+
516
573
  var level = element.firstOrEmpty("w:ilvl").attributes["w:val"];
517
574
  var numId = element.firstOrEmpty("w:numId").attributes["w:val"];
518
575
  if (level === undefined || numId === undefined) {